{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 如何使用PyTorch更方便地实现线性回归的训练\n",
    "%matplotlib inline\n",
    "import torch\n",
    "from IPython import display\n",
    "from matplotlib import pyplot as plt\n",
    "import numpy as np\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 生成数据集，features是训练数据的特征，labels是标签\n",
    "\n",
    "num_inputs = 2\n",
    "num_examples = 1000\n",
    "true_w = [2, -3.4]\n",
    "true_b = 4.2\n",
    "features = torch.tensor(np.random.normal(0, 1, (num_examples, num_inputs)), dtype=torch.float)\n",
    "labels = true_w[0] * features[:,0] + true_w[1] * features[:,1] + true_b\n",
    "labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([10, 2]) torch.Size([10])\n",
      "tensor([[-0.6343, -0.7834],\n",
      "        [ 1.5825, -0.2511],\n",
      "        [ 1.2278, -0.5677],\n",
      "        [-0.0984, -1.1131],\n",
      "        [-0.8056, -0.3190],\n",
      "        [ 1.5381, -0.9060],\n",
      "        [ 0.1876,  0.5751],\n",
      "        [ 0.3670,  0.5845],\n",
      "        [ 2.8208, -0.3767],\n",
      "        [ 0.6903,  0.6510]]) tensor([ 5.6052,  8.2342,  8.5794,  7.7792,  3.6737, 10.3628,  2.6061,  2.9541,\n",
      "        11.0966,  3.3627])\n"
     ]
    }
   ],
   "source": [
    "# 读取数据\n",
    "# PyTorch提供了data包来读取数据。由于data常用作变量名，将导入的data模块用Data代替\n",
    "\n",
    "import torch.utils.data as Data\n",
    "\n",
    "batch_size = 10\n",
    "# 将训练数据的特征和标签组合\n",
    "dataset = Data.TensorDataset(features, labels)\n",
    "# 随机读取小批量 \n",
    "data_iter = Data.DataLoader(dataset, batch_size, shuffle=True)\n",
    "\n",
    "for X, y in data_iter:\n",
    "    print(X.shape, y.shape)\n",
    "    print(X, y)\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LinearNet(\n",
      "  (linear): Linear(in_features=2, out_features=1, bias=True)\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "# 定义模型\n",
    "# PyTorch提供了大量预定义的层，这使我们只需关注使用哪些层来构造模型\n",
    "# 导入torch.nn模块，该模块定义了大量神经网络的层\n",
    "# nn的核心数据结构是Module，它是一个抽象概念\n",
    "# 既可以表示神经网络中的某个层（layer），也可以表示一个包含很多层的神经网络\n",
    "# 常见的做法是继承nn.Module，撰写自己的网络/层\n",
    "# 一个nn.Module实例应该包含一些层以及返回输出的前向传播（forward）方法\n",
    "\n",
    "from torch import nn\n",
    "\n",
    "# 通过继承 nn.Module\n",
    "class LinearNet(nn.Module):\n",
    "    def __init__(self, n_feature):\n",
    "        super(LinearNet, self).__init__()\n",
    "        self.linear = nn.Linear(in_features=n_feature, out_features=1)\n",
    "    # forward 定义前向传播\n",
    "    def forwar(self, x):\n",
    "        y = self.linear(x)\n",
    "        return y\n",
    "\n",
    "net = LinearNet(num_inputs)\n",
    "print(net) # 使用print可以打印出网络的结构"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sequential(\n",
      "  (linear): Linear(in_features=2, out_features=1, bias=True)\n",
      ")\n",
      "Linear(in_features=2, out_features=1, bias=True)\n"
     ]
    }
   ],
   "source": [
    "# 还可以用nn.Sequential来更加方便地搭建网络，Sequential是一个有序的容器\n",
    "# 网络层将按照在传入Sequential的顺序依次被添加到计算图中\n",
    "\n",
    "# 写法一：\n",
    "net = nn.Sequential(\n",
    "    nn.Linear(num_inputs, 1)\n",
    "    # 此处还可以传入其他层\n",
    ")\n",
    "\n",
    "# 写法二：\n",
    "net = nn.Sequential()\n",
    "net.add_module('linear', nn.Linear(num_inputs, 1))\n",
    "# net.add_module ......\n",
    "\n",
    "# 写法三：\n",
    "from collections import OrderedDict\n",
    "net = nn.Sequential(OrderedDict([\n",
    "    ('linear', nn.Linear(num_inputs, 1))\n",
    "]))\n",
    "\n",
    "print(net)\n",
    "print(net[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parameter containing:\n",
      "tensor([[ 0.4282, -0.6694]], requires_grad=True)\n",
      "Parameter containing:\n",
      "tensor([-0.6548], requires_grad=True)\n"
     ]
    }
   ],
   "source": [
    "# 可以通过net.parameters()来查看模型所有的可学习参数，此函数将返回一个生成器\n",
    "for param in net.parameters():\n",
    "    print(param)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "作为一个单层神经网络，线性回归输出层中的神经元和输入层中各个输入完全连接。因此，线性回归的输出层又叫全连接层\n",
    "\n",
    "torch.nn仅支持输入一个batch的样本不支持单个样本输入，如果只有单个样本，可使用input.unsqueeze(0)来添加一维"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parameter containing:\n",
       "tensor([0.], requires_grad=True)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 初始化模型参数\n",
    "# PyTorch在init模块中提供了多种参数初始化方法\n",
    "\n",
    "from torch.nn import init\n",
    "\n",
    "init.normal_(net[0].weight, mean=0, std=0.01)\n",
    "init.constant_(net[0].bias, val=0)\n",
    "# 也可以直接修改bias的data: net[0].bias.data.fill_(0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 定义损失函数\n",
    "# nn模块中提供了各种损失函数，这些损失函数可看作是一种特殊的层，PyTorch也将这些损失函数实现为nn.Module的子类\n",
    "\n",
    "#均方误差损失\n",
    "loss = nn.MSELoss()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SGD (\n",
      "Parameter Group 0\n",
      "    dampening: 0\n",
      "    lr: 0.03\n",
      "    momentum: 0\n",
      "    nesterov: False\n",
      "    weight_decay: 0\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "# 定义优化算法\n",
    "# torch.optim模块提供了很多常用的优化算法比如SGD、Adam和RMSProp等\n",
    "\n",
    "import torch.optim as optim\n",
    "\n",
    "optimizer = optim.SGD(net.parameters(), lr=0.03)\n",
    "print(optimizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 可以为不同子网络设置不同的学习率\n",
    "# optimizer =optim.SGD([\n",
    "#                 # 如果对某个参数不指定学习率，就使用最外层的默认学习率\n",
    "#                 {'params': net.subnet1.parameters()}, # lr=0.03\n",
    "#                 {'params': net.subnet2.parameters(), 'lr': 0.01}\n",
    "#             ], lr=0.03)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch  1, loss: 0.000162\n",
      "epoch  2, loss: 0.000042\n",
      "epoch  3, loss: 0.000128\n"
     ]
    }
   ],
   "source": [
    "# 训练模型\n",
    "# 通过调用optim实例的step函数来迭代模型参数\n",
    "\n",
    "num_epochs = 3\n",
    "for epoch in range(1, num_epochs + 1):\n",
    "    for X,y in data_iter:\n",
    "        output = net(X)\n",
    "        l = loss(output, y.view(-1, 1))\n",
    "        optimizer.zero_grad() # 梯度清零，等价于net.zero_grad()\n",
    "        l.backward()\n",
    "        optimizer.step()\n",
    "    print('epoch % d, loss: %f' % (epoch, l.item()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2, -3.4] Parameter containing:\n",
      "tensor([[ 1.9993, -3.4011]], requires_grad=True)\n",
      "4.2 Parameter containing:\n",
      "tensor([4.1995], requires_grad=True)\n"
     ]
    }
   ],
   "source": [
    "# 从net获得需要的层，并访问其权重（weight）和偏差（bias）\n",
    "dense = net[0]\n",
    "print(true_w, dense.weight)\n",
    "print(true_b, dense.bias)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用PyTorch可以更简洁地实现模型。\n",
    "torch.utils.data模块提供了有关数据处理的工具，torch.nn模块定义了大量神经网络的层，torch.nn.init模块定义了各种初始化方法，torch.optim模块提供了很多常用的优化算法。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "ename": "NotImplementedError",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNotImplementedError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-20-490d99a25aa3>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     34\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnum_epochs\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     35\u001b[0m     \u001b[1;32mfor\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0my\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdata_iter\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 36\u001b[1;33m         \u001b[0moutput\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnet\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     37\u001b[0m         \u001b[0ml\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mloss\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mview\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     38\u001b[0m         \u001b[0moptimizer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# 梯度清零，等价于net.zero_grad()\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Anaconda3\\envs\\yczlab_python3.6\\lib\\site-packages\\torch\\nn\\modules\\module.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m    530\u001b[0m             \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    531\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 532\u001b[1;33m             \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    533\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    534\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Anaconda3\\envs\\yczlab_python3.6\\lib\\site-packages\\torch\\nn\\modules\\module.py\u001b[0m in \u001b[0;36mforward\u001b[1;34m(self, *input)\u001b[0m\n\u001b[0;32m     94\u001b[0m             \u001b[0mregistered\u001b[0m \u001b[0mhooks\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mlatter\u001b[0m \u001b[0msilently\u001b[0m \u001b[0mignores\u001b[0m \u001b[0mthem\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     95\u001b[0m         \"\"\"\n\u001b[1;32m---> 96\u001b[1;33m         \u001b[1;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     97\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     98\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mregister_buffer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtensor\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNotImplementedError\u001b[0m: "
     ]
    }
   ],
   "source": [
    "# 整个网络如下\n",
    "\n",
    "import torch\n",
    "import torch.utils.data as Data\n",
    "from torch import nn\n",
    "import torch.optim as optim\n",
    "\n",
    "num_inputs = 2\n",
    "num_examples = 1000\n",
    "true_w = [2, -3.4]\n",
    "true_b = 4.2\n",
    "features = torch.tensor(np.random.normal(0, 1, (num_examples, num_inputs)), dtype=torch.float)\n",
    "labels = true_w[0] * features[:,0] + true_w[1] * features[:,1] + true_b\n",
    "labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float)\n",
    "\n",
    "batch_size = 10\n",
    "dataset = Data.TensorDataset(features, labels)\n",
    "data_iter = Data.DataLoader(dataset, batch_size, shuffle=True)\n",
    "\n",
    "class LinearNet(nn.Module):\n",
    "    def __init__(self, n_feature):\n",
    "        super(LinearNet, self).__init__()\n",
    "        self.linear = nn.Linear(in_features=n_feature, out_features=1)\n",
    "    def forwar(self, x):\n",
    "        y = self.linear(x)\n",
    "        return y\n",
    "\n",
    "net = LinearNet(num_inputs)\n",
    "loss = nn.MSELoss()\n",
    "optimizer = optim.SGD(net.parameters(), lr=0.03)\n",
    "\n",
    "num_epochs = 3\n",
    "for epoch in range(1, num_epochs + 1):\n",
    "    for X,y in data_iter:\n",
    "        output = net(X)\n",
    "        l = loss(output, y.view(-1, 1))\n",
    "        optimizer.zero_grad() # 梯度清零，等价于net.zero_grad()\n",
    "        l.backward()\n",
    "        optimizer.step()\n",
    "    print('epoch % d, loss: %f' % (epoch, l.item()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "yczlab_3.6",
   "language": "python",
   "name": "yczlab_python3.6"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
